Load data and packages

reference SCLC multiome analysis.Rmd for in depth description of sclc RDS.

sclc <- readRDS('/Users/smgroves/Box/multiome_data/M1/outs/TKO.rds')
sclc
## An object of class Seurat 
## 467092 features across 8483 samples within 6 assays 
## Active assay: ATAC (195588 features, 195588 variable features)
##  5 other assays present: RNA, chromvar, SCT, peaks, gene_activity
##  3 dimensional reductions calculated: lsi, umap, pca

Run ParetoTI on SCTransformed scaled.data (via the PCA projection from Debbie’s analysis)

# x <- sclc@assays$SCT@scale.data
x_pca <- read.csv('../data/pca-embedding.csv',header = TRUE,row.names = 1)
x_pca <- t(x_pca)
x_pca <- x_pca[1:20,] #keep only top 18 PCs
loadings<- read.csv('../data/pca-feature-loadings-projected.csv', header = TRUE, row.names = 1)
loadings <- as.matrix(loadings)
##################################
load("../data/arc.Robj", verbose=TRUE)
## Loading objects:
##   arc
load("../data/arc_ks.Robj", verbose=TRUE)
## Loading objects:
##   arc_ks
# Fit 5 archetypes
# arc <- fit_pch((x_pca), noc = 5)
# # Fit 5 archetypes with bootstrapping for robustness 
# arc_rob = fit_pch_bootstrap(x_pca, n = 200, sample_prop = .8, seed = 2543, delta = 1,
#                             noc = 5)
# arc_ave <- average_pch_fits(arc_rob)
# 
# 
# 
# arc_ks = ParetoTI::k_fit_pch(x_pca, ks = 2:8, check_installed = T,
#                    bootstrap = T, bootstrap_N = 200, maxiter = 1000,
#                    bootstrap_type = "m", seed = 2543, 
#                    volume_ratio = "t_ratio", # set to "none" if too slow
#                    delta=0, conv_crit = 1e-04, order_type = "align",
#                    sample_prop = 0.75)
# 
# # Show variance explained by a polytope with each k (cumulative)
ParetoTI::plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()

# 
# 
# save(arc, file="../data/arc.Robj")
# save(arc_ks, file="../data/arc_ks.Robj")

Plot UMAP of data for reference

## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following objects are masked from 'package:ensembldb':
## 
##     filter, select
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Plot PCA of archetypes

It looks like ciliated cells are dominating PC2, so we are going to remove those cells and just look at the four archetypes defining the other cell types.

sclc_small <- subset(x = sclc, subset = cluster != "Cilliated cells")

# write.csv(sclc@assays$SCT@scale.data, '../data/scaled-SCT-data.csv')
# write.csv(sclc@reductions$pca@cell.embeddings, '../data/pca-embedding.csv')
# write.csv(sclc@reductions$pca@feature.loadings, '../data/pca-feature-loadings.csv')
# 
# 
# write.csv(sclc@reductions$pca@feature.loadings.projected, '../data/pca-feature-loadings-projected.csv')
DefaultAssay(sclc_small) <- "RNA"
# store mitochondrial percentage in object meta data
sclc_small <- PercentageFeatureSet(sclc_small, pattern = "mt-", col.name = "percent.mt")

# run sctransform
# BiocManager::install("glmGamPoi")
library(glmGamPoi)
sclc_small <- SCTransform(sclc_small, vars.to.regress = "percent.mt", verbose = FALSE,method = "glmGamPoi")

# These are now standard steps in the Seurat workflow for visualization and clustering

sclc_small <- Seurat::ScaleData(object = sclc_small)

sclc_small <- Seurat::RunPCA(object = sclc_small)
sclc_small <- Seurat::ProjectDim(sclc_small, reduction = 'pca')
## PC_ 1 
## Positive:  Mecom, Scgb1a1, Col23a1, Rad51b, Atp11a, Ptpn13, Samd4, Prdm16, Runx1, Nckap5 
##     Rai14, Eya2, Lrmda, Por, Sfta3-ps, Ahnak, Il18r1, Slc4a5, Nfia, Myo5c 
## Negative:  Ptprt, Meis2, Nrxn1, Piezo2, Cdh13, Cacna1a, Nlgn1, Celf4, Slc8a1, Ctnna2 
##     mt-Nd1, Tcerg1l, Ptprn2, Zfpm2, Col8a1, Syt1, Pclo, Cntnap5b, Cacna2d1, Dscaml1 
## PC_ 2 
## Positive:  Pbx1, 2610035D17Rik, Fndc3b, Dnaja1, Cdh1, Hsp90aa1, Gpr158, Egfr, Ralgapa2, Snap25 
##     Tenm4, Malat1, Hsph1, Chka, Shroom3, Neat1, Hspa4l, Slc18a1, Rgs7, Anxa1 
## Negative:  mt-Cytb, mt-Nd1, mt-Nd2, Cplx2, mt-Nd6, Gm42418, Sftpc, Lyz2, Hist1h1e, Nkain3 
##     Sftpa1, Adam19, Ddc, Fli1, Slc1a3, Ldb2, Eng, Hc, Adgrl3, Ptprb 
## PC_ 3 
## Positive:  Hsp90aa1, Hsp90ab1, Hsph1, Dnaja1, Hspa1a, Hspa1b, Hspa8, Hspd1, Bag3, Hspb1 
##     Megf9, Mfsd2a, Utp14b, Rc3h2, Scg2, Dnajb1, Ugcg, Fam107b, Hsp90b1, Hspe1 
## Negative:  Col8a1, Nrxn1, Malat1, Rad51b, Pde4d, 9030622O22Rik, Nlgn1, Atp8a1, Sfta3-ps, Mecom 
##     Ctnna2, Grid2, Agbl4, Pbx1, Lrmda, Kcnip1, Fos, Tcerg1l, Ptprn2, Col23a1 
## PC_ 4 
## Positive:  Mki67, Top2a, Cenpe, Cenpf, Kif15, Knl1, Kif20b, Aspm, Hmgb2, Tpx2 
##     Hist1h1e, Hist1h1d, Anln, Smc4, Cdca2, Nusap1, Clspn, Incenp, Hist1h1a, Hist1h1c 
## Negative:  Gpr158, Ralgapa2, Dscaml1, Dlgap1, Egfr, Ptprn, Tenm4, 2610035D17Rik, Fndc3b, Celf4 
##     Rps6ka2, Tmtc1, Phldb2, Cntnap5b, Pbx1, March11, Prkca, Oxr1, Cdh13, Ank3 
## PC_ 5 
## Positive:  Sftpc, Flt1, Fendrr, Afap1l1, Slc34a2, Adgrl3, Cd36, Ldb2, Prex2, Eng 
##     Hc, Sftpa1, Ptprb, Calcrl, Hmcn1, Lyz2, Sftpb, Col4a1, Pecam1, Bmp1 
## Negative:  Hsp90aa1, Hsp90ab1, Dnaja1, Hsph1, Hspa1a, Hspa1b, Dnajb1, Ubc, Hspd1, Hspa8 
##     Ptprj, Slc4a4, Itprid1, Ptpn13, Col15a1, Por, Samd4, Large1, Hspb1, B4galt5
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.SCT.pca; see ?make.names for more details on
## syntax validity
x_pca_small <- sclc_small@reductions$pca@cell.embeddings
x_pca_small <- t(x_pca_small)
x_pca_small <- x_pca_small[1:20,] #keep only top 18 PCs
loadings<- sclc_small@reductions$pca@feature.loadings.projected
loadings <- as.matrix(loadings)
##################################

# Fit 4 archetypes
arc <- fit_pch((x_pca_small), noc = 4)
## Warning: Python '/Users/smgroves/Library/r-miniconda/envs/reticulate_PCHA/
## bin/python' was requested but '/Users/smgroves/Documents/anaconda3/envs/
## reticulate_PCHA/bin/python3.7' was loaded instead (see reticulate::py_config()
## for more information)
# Fit 4 archetypes with bootstrapping for robustness
arc_rob = fit_pch_bootstrap(x_pca_small, n = 200, sample_prop = .8, seed = 2543, delta = 1,
                            noc = 4)
# arc_ave <- average_pch_fits(arc_rob)
# 
# 
# 
# arc_ks = ParetoTI::k_fit_pch(x_pca_small, ks = 2:8, check_installed = T,
#                    bootstrap = T, bootstrap_N = 200, maxiter = 1000,
#                    bootstrap_type = "m", seed = 2543,
#                    volume_ratio = "t_ratio", # set to "none" if too slow
#                    delta=0, conv_crit = 1e-04, order_type = "align",
#                    sample_prop = 0.75)

# Show variance explained by a polytope with each k (cumulative)
# ParetoTI::plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()


p_pca = plot_arc(arc_data = arc, data = x_pca_small, 
                 which_dimensions = 1:3, line_size = 1.5,
                  data_lab = as.character(Seurat::Idents(sclc_small)),
                 colors = palette(rainbow(20)),
                 text_size = 60, data_size = 4) 
plotly::layout(p_pca, title = "Average Archetypes for Top 20 PCs")
#